import os
import glob as glob
import pdb
import json

pinyin_to_id = {}

def pinyin_all():
    info_file = '/opt/tiger/arnold_test/workstation/data/single_speaker/data_biaobei/ProsodyLabeling/000001-010000.txt'
    pinyin_to_id_json_file = 'biaobei_pinyin_to_id_dict.json'

    index = 1

    with open(info_file,encoding = 'utf-8') as ProsodyLabeling_file_handle:
        info_line = ProsodyLabeling_file_handle.readline().strip('\n')
        pinyin_line = ProsodyLabeling_file_handle.readline().strip('\n')
        
        while info_line and pinyin_line:
            pinyin_list = pinyin_line[1:].split(' ')
            for pinyin in pinyin_list:
                if pinyin not in pinyin_to_id.keys():
                    pinyin_to_id[pinyin] = index
                    index += 1
            info_line = ProsodyLabeling_file_handle.readline().strip('\n')
            pinyin_line = ProsodyLabeling_file_handle.readline().strip('\n')
    
    with open(pinyin_to_id_json_file,'w+') as f:
        json.dump(pinyin_to_id,f)
    
    pinyin_to_id['UNK'] = index
    index += 1
    pinyin_to_id['EOS'] = index
    with open(pinyin_to_id_json_file,'w+') as f:
            json.dump(pinyin_to_id,f)

if __name__ == "__main__":
    pinyin_all()
